*************************************************************************     									   
* Last change:	17/11/2017                                             
*************************************************************************


*** program setup
version 14.2
clear all
macro drop _all
program drop _all
set more off



******GLOBALS
******************************************

** generate globals for folders and years
local startyear	1980
local endyear	2000
local startplus1 1981


foreach y of numlist `startyear' / `endyear' {
use "data\orig/clean`y'.dta", clear
gen year=`y'


*merge in consistent education variable
sort vsnr_ano
merge 1:1 vsnr_ano using "data/consistenteduc.dta"  
keep if _merge==3
drop _merge

*sample restrictions etc.
do "do-files\data_management/cr_prepare_variables_ado.do"

*merge in average wage and average share of censored observations for each worker
sort vsnr_ano
merge 1:1 vsnr_ano using "data/pmeans1980_2000.dta"   
drop if _merge==2
drop _merge

bys vsnr_ano:  gen obs1=(nobs==1)

*re-calculates average wage and censoring indicator without current (year) observation
foreach X of varlist lw cens {
by vsnr_ano: replace i`X'=(nobs*i`X'-`X')/(nobs-1)
}
drop nobs

*average wage, censoring indicator, years of schooling and share university graduates
*in firm, excluding own worker

bys bnr_ano_n:  gen obsfirm1=(_N==1)

gen edu3=0
replace edu3=1 if edu==3

foreach X of varlist lw cens edu3 school {
bys bnr_ano_n: egen jt`X'=mean(`X')
bys bnr_ano_n: replace jt`X'=(_N*jt`X'-`X')/(_N-1)
}

*age categories
    gen agecat = 1 if alter>= 16 & alter<=25
replace agecat = 2 if alter>= 26 & alter<=35
replace agecat = 3 if alter>= 36 & alter<=45
replace agecat = 4 if alter>= 46 & alter<=55
replace agecat = 5 if alter>= 56 & alter<=65


*********impute wages
*********************************************

gen impy=lw
sum

forvalues edu	=1/3{
*forvalues agecat=1/5{
forvalues sex	=1/2{
foreach AMR of numlist 1/50{
	di "year `y'"
	di "edu `edu'"
	*di "agecat `agecat'"
	di "AMR `AMR'"
	di "sex `sex'"

*set mean wages and censoring indicator for individual to categorical mean (mean without current obs) if only one observation per worker
foreach X in lw cens{
sum `X' if edu==`edu' & sex==`sex' & AMR==`AMR'
replace i`X'=r(mean)  if obs1==1 & edu==`edu' & sex==`sex' & AMR==`AMR'
}

*set mean wages and other variables at firm to categorical mean if only one employee per firm
foreach X of varlist lw cens edu3 school {
sum `X' if edu==`edu' & sex==`sex' & AMR==`AMR'
replace jt`X'=r(mean) if obsfirm1==1 & edu==`edu' & sex==`sex' & AMR==`AMR'
}

sum lw if edu==`edu' & sex==`sex' & AMR==`AMR'

if r(N)>50 {

sum lw if AMR==`AMR'
local wmax=r(max)-0.001
 
sum icens if edu==`edu' & sex==`sex' & AMR==`AMR'
* Tobit regr doesn't converge, drop icens as dummy if too low
* include i.agecat
if r(mean)<0.005 {
di "icens regressor dropped because mean of icens<0.005"
tobit lw alter fsize fsize2 fsize11     ilw jtlw jtcens jtedu3 jtschool obs1 obsfirm1 if edu==`edu' & sex==`sex' & AMR==`AMR', ul(`wmax')
}
else          {
capture {
   tobit lw alter fsize fsize2 fsize11  icens ilw jtlw jtcens jtedu3 jtschool obs1 obsfirm1 if edu==`edu' & sex==`sex' & AMR==`AMR', ul(`wmax')
}
if e(converged)== 0 {
di "icens regressor dropped because no convergence"
	tobit lw alter fsize fsize2 fsize11    ilw jtlw jtcens jtedu3 jtschool obs1 obsfirm1 if edu==`edu' & sex==`sex' & AMR==`AMR', ul(`wmax')
}
else    tobit lw alter fsize fsize2 fsize11  icens ilw jtlw jtcens jtedu3 jtschool obs1 obsfirm1 if edu==`edu' & sex==`sex' & AMR==`AMR', ul(`wmax')

}

* tobit lw alter , ul(`wmax')
di "Fraction censored calculated from estimation output: " e(N_rc) / e(N)
local sigma= _b[/sigma]
cap drop xb
predict xb if e(sample)
cap drop k
gen k=normal((`wmax'-xb)/`sigma')
sum k
local meank=r(mean)
cap drop draw
gen draw=k+uniform()*(1-k)
replace draw=1-3e-7 if draw>1-3e-7 & draw<.
replace impy=xb+`sigma'*invnorm(draw) if e(sample) & lw>=e(ulopt)
qui sum impy if e(sample) & lw>=e(ulopt)
replace impy = r(mean) if e(sample) & impy==.
sum impy lw if e(sample), detail
}
else 	replace impy=lw if edu==`edu' & sex==`sex' & AMR==`AMR'
disp "AMR less than 50 people" `y' `AMR'
}
}
}

sum impy lw, detail

keep vsnr_ano year impy 
sort vsnr_ano
compress
save "data/impy`y'.dta", replace
	}


*append data
clear
use "data/impy`startyear'.dta", clear

foreach y of numlist `startplus1' / `endyear' {
append using "data/impy`y'.dta"
	}

*erase data
foreach y of numlist `startyear' / `endyear' {
erase "data/impy`y'.dta"
	}


sort vsnr_ano year
save "data/impy`startyear'_`endyear'.dta", replace

dir "data/*.dta"


clear all

exit
